.libPaths(c("/data/home/lyang/R/x86_64-redhat-linux-gnu-library/4.1",
            "/data/home/bioinfo/R/R4.1.0/library_B3.13",
            "/usr/lib64/R/library","/usr/share/R/library") )
dyn.load("/data/home/bioinfo/programs/hdf5-1.12.0/hdf5/lib/libhdf5_hl.so.200")
knitr::opts_knit$set(root.dir = "/data/home/lyang/Visium_spotlight")
setwd("/data/home/lyang/Visium_spotlight")

Load packages

# library(Matrix)
library(data.table)
library(Seurat)
library(SeuratDisk)
library(dplyr)


dataset_name = "cell2location34"
# in_house  cell2location34
sc_dataset_file = paste(dataset_name,"sc_dataset.rds",sep ="_")
rds_file= sc_dataset_file
sc_seu <- readRDS(rds_file)
  
rds_file='Visium_spatial.rds'
sp_seu <- readRDS(rds_file)

Quick data exploration:

# "cell2location34" Tabula in_house
dataset_name = "cell2location34"
sc_dataset = sc_seu
if(dataset_name == "cell2location34"){

  cell_type_table = as.data.frame(table(sc_dataset$Subset))

}else if(dataset_name == "in_house"){
  cell_type_table = as.data.frame(table(sc_dataset$blue.main))
}else if(dataset_name == "cell2location44"){
  cell_type_table = as.data.frame(table(sc_dataset$CellType))
}




colnames(cell_type_table) = c("cell_type","frequency")
library(ggplot2)
# Basic barplot
p <- ggplot(data=cell_type_table, aes(x=cell_type, y=frequency,fill=cell_type)) +
  geom_bar(stat="identity")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))+
       geom_text(aes(label=frequency), position=position_dodge(width=0.9), vjust=-0.25)


# ggsave(p,filename = "cell_type_histogram.pdf",width = 15,
#        height = 20)

print(p)

getwd()
## [1] "/data/home/lyang/Visium_spotlight"
if(file.exists("old_version_ls.rds")){
  rds_file= "old_version_ls.rds"
  spotlight_ls <- readRDS(rds_file)
  nmf_mod <- spotlight_ls[[1]]
  decon_mtrx <- spotlight_ls[[2]]
  }else{
sc_seu <- SCTransform(sc_seu, verbose = FALSE) %>% RunPCA(verbose = FALSE)
sp_seu <- SCTransform(sp_seu,assay = "Spatial", verbose = FALSE) %>% RunPCA(verbose = FALSE)

#### Extract the top marker genes from each cluster ####
Seurat::Idents(object = sc_seu) <- sc_seu@meta.data$Subset
cluster_markers_all <- Seurat::FindAllMarkers(object = sc_seu, 
                                              assay = "RNA",
                                              slot = "data",
                                              verbose = TRUE, 
                                              only.pos = TRUE, 
                                              logfc.threshold = 1,
                                              min.pct = 0.9)
set.seed(123)
setwd("/data/home/lyang/Visium_spotlight")

start_time <- Sys.time()
library(SPOTlight, lib.loc="/data/home/bioinfo/R/R4.1.0/library_B3.13/old_versions/")
spotlight_ls <- spotlight_deconvolution(se_sc = sc_seu,
                                      counts_spatial = sp_seu@assays$Spatial@counts,
                                      clust_vr = "Subset",
                                      cluster_markers = cluster_markers_all,
                                      cl_n = 100, # 100 by default
                                      hvg = 5000,
                                      ntop = NULL,
                                      transf = "uv",
                                      method = "nsNMF",
                                      min_cont = 0.09)
saveRDS(spotlight_ls,"old_version_ls.rds")
end_time <- Sys.time()
end_time - start_time
    
  }
unloadNamespace("SPOTlight")
library(SPOTlight, lib.loc="/data/home/bioinfo/R/R4.1.0/library_B3.13/old_versions/")
h <- NMF::coef(nmf_mod[[1]])
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
  h = h,
  train_cell_clust = nmf_mod[[2]])

topic_profile_plts[[2]] + ggplot2::theme(
  axis.text.x = ggplot2::element_text(angle = 90), 
  axis.text = ggplot2::element_text(size = 12))

topic_profile_plts[[1]] + theme(axis.text.x = element_text(angle = 90), 
                                axis.text = element_text(size = 12))

# change the cell type names in sp_seu, replace _ with . to keep names coincide between sp_seu and decon_mtrx
decon_mtrx_sub <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
decon_mtrx_sub[decon_mtrx_sub < 0.08] <- 0
decon_mtrx <- cbind(decon_mtrx_sub, "res_ss" = decon_mtrx[, "res_ss"])
rownames(decon_mtrx) <- colnames(sp_seu)

decon_df <- decon_mtrx %>%
  data.frame() %>%
  tibble::rownames_to_column("barcodes")

sp_seu@meta.data <- sp_seu@meta.data %>%
  tibble::rownames_to_column("barcodes") %>%
  dplyr::left_join(decon_df, by = "barcodes") %>%
  tibble::column_to_rownames("barcodes")

norm_weights_matrix = decon_mtrx

# Seurat::SpatialFeaturePlot(
#   object = sp_seu,
#   features = c("B.Cycling", "B.GC.DZ"),
#   alpha = c(0.1, 1))
cell_types_all <- colnames(decon_mtrx)[which(colnames(decon_mtrx) != "res_ss")]
getwd()
## [1] "/data/home/lyang/Visium_spotlight"
# install.packages("imager")
# SPOTlight::spatial_scatterpie(se_obj = sp_seu,
#                               cell_types_all = cell_types_all,
#                               img_path = "tissue_lowres_image.png",
#                               pie_scale = 0.4)
# 
# 
# SPOTlight::spatial_scatterpie(se_obj = sp_seu,
#                               cell_types_all = cell_types_all,
#                               img_path = "tissue_lowres_image.png",
#                               cell_types_interest = "B.naive",
#                               pie_scale = 0.4)
# "T_CD4+_naive","FDC","B_naive"
# This can be dynamically visualized with DT as shown below
unloadNamespace("SPOTlight")
library(SPOTlight)

ct <- colnames(decon_mtrx[,-ncol(decon_mtrx)])
decon_mtrx[decon_mtrx < 0.1] <- 0


library(RColorBrewer)

if(dataset_name == "Tabula"){
  n <- 29
}else if(dataset_name == "cell2location"){
  n <- 44
}else if(dataset_name == "cell2location34"){
  n <- 34
}

  
colrs <- brewer.pal.info[brewer.pal.info$colorblind == TRUE, ]
col_vec = unlist(mapply(brewer.pal, colrs$maxcolors, rownames(colrs)))
set.seed(123)
col <- sample(col_vec, n)


# show T cell and 
# B cell zones and GCs with follicular dendritic cells (FDCs)
# FDC, T_CD4+_naive, B_naive
# Transcriptionally fine subtypes (B_GC_DZ, B_GC_LZ, B_GC_prePB and 
# T_CD4+_TfH_GC); transcriptionally distinct subtypes (B_Cycling and FDC)


paletteMartin <- col

pal <- colorRampPalette(paletteMartin)(length(ct))
names(pal) <- ct
pal_back <- pal


plot_3_region <- function(pal)
{
  for (char in names(pal)) {
  # print(char)
  if(char %in% c("T.CD4..naive","FDC","B.naive") ){
    if(char == "T.CD4..naive")
    {
      pal[char] = "#FFFF00"
    } else if (char == "FDC")
    {
      pal[char] = "#add8e6"
    } else if (char == "B.naive")
    {
      pal[char] = "#FF0000"
    }
    next
  }
  pal[char] = "#00008b"
}

  return(pal)
}
packageVersion("SPOTlight")
## [1] '0.99.4'
pal = plot_3_region(pal_back)
library(ggplot2)
plotSpatialScatterpie(
  x = sp_seu,
  y = decon_mtrx,
  cell_types = colnames(y),
  img = FALSE,
  scatterpie_alpha = 1,
  pie_scale = 0.4) +
  scale_fill_manual(
    values = pal,
    breaks = names(pal))+ scale_y_reverse()

plot_n_cell_type <- function(pal,col_vec,n, cell_type_vector)
{
  for (char in names(pal)) {
  # print(char)
  if(char %in%  cell_type_vector){
    next
  }
  pal[char] = "#00008b"
}

  return(pal)
}

cell_type_vector = c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC",
                 "B.Cycling",  "FDC")

pal = plot_n_cell_type(pal_back,col_vec,6,cell_type_vector)
plotSpatialScatterpie(
  x = sp_seu,
  y = decon_mtrx,
  cell_types = colnames(y),
  img = FALSE,
  scatterpie_alpha = 1,
  pie_scale = 0.4) +
  scale_fill_manual(
    values = pal,
    breaks = names(pal))+ scale_y_reverse()

pal = pal_back
# type(pal)
plotSpatialScatterpie(
  x = sp_seu,
  y = decon_mtrx,
  cell_types = colnames(y),
  img = FALSE,
  scatterpie_alpha = 1,
  pie_scale = 0.4) +
  scale_fill_manual(
    values = pal,
    breaks = names(pal))+ scale_y_reverse()

rds_file='predictions.assay.rds'
predictions.assay <- readRDS(rds_file)
mat = decon_mtrx
spatial = sp_seu
predictions.assay@data <- t(mat)
meta.features <- as.data.frame(colnames(mat) )
rownames(meta.features) = meta.features[,1]
meta.features$`colnames(mat)` =NULL
predictions.assay@meta.features = meta.features


spatial[["predictions"]] <- predictions.assay

DefaultAssay(spatial) <- "predictions"
# table(sc_dataset@meta.data$Subset)


if(dataset_name == "Tabula"){
  
p = SpatialFeaturePlot(spatial, features = c("stromal cell", "t cell"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)
# print(p)
# p = SpatialFeaturePlot(spatial, features = c("naive b cell", "b cell"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)
# print(p)
  
}else if(dataset_name == "cell2location"){
  
 SpatialFeaturePlot(spatial, features = c("T_Treg", "DC_cDC2"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)
# SpatialFeaturePlot(spatial, features = c("DC_cDC1", "DC_cDC2"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)

}else if(dataset_name == "cell2location34"){
  
# detach("package:SpatialExperiment", unload=TRUE)
  
p = SpatialFeaturePlot(spatial, features = c("B.GC.LZ", "T.CD4..TfH.GC","B.GC.prePB","FDC"), pt.size.factor = 1.6, ncol = 4, crop = TRUE) + ggtitle("germinal center light zone")
print(p)
 # + scale_fill_continuous(limits = c(0, 1))

p = SpatialFeaturePlot(spatial, features = c("B.Cycling", "B.GC.DZ"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)+ ggtitle("germinal center dark zone") 
print(p)
SpatialFeaturePlot(spatial, features = c("B.naive", "B.preGC"), pt.size.factor = 1.6, ncol = 2, crop = TRUE) + ggtitle("B follicle + pre GC") 
}

# table(sc_dataset@meta.data$Subset)
# https://github.com/satijalab/seurat/blob/master/R/visualization.R
# Leave out selected cell types from sc reference dataset(B_GC_DZ, B_GC_LZ, B_GC_prePB, T_CD4+_TfH_GC, B_Cycling and FDC, these six subtypes are expected to be present in annotated GC (positive locations) one by one using ‘subset’ command.


# sc_seu <- SCTransform(sc_seu, verbose = FALSE) %>% RunPCA(verbose = FALSE)
# sp_seu <- SCTransform(sp_seu,assay = "Spatial", verbose = FALSE) %>% RunPCA(verbose = FALSE)
# saveRDS(sp_seu,"sp_seu_SCTransform.rds")
# saveRDS(sc_seu,"sc_seu_SCTransform.rds")


rds_file= "sp_seu_SCTransform.rds"
sp_seu <- readRDS(rds_file)
rds_file= "sc_seu_SCTransform.rds"
sc_seu <- readRDS(rds_file)

#### Extract the top marker genes from each cluster ####
# Seurat::Idents(object = sc_seu) <- sc_seu@meta.data$Subset
# cluster_markers_all <- Seurat::FindAllMarkers(object = sc_seu, 
#                                               assay = "RNA",
#                                               slot = "data",
#                                               verbose = TRUE, 
#                                               only.pos = TRUE, 
#                                               logfc.threshold = 1,
#                                               min.pct = 0.9)
# saveRDS(cluster_markers_all,"cluster_markers_all.rds")
# Leave out selected cell types from  marker genes variables

rds_file= "cluster_markers_all.rds"
cluster_markers_all <- readRDS(rds_file)

#  unloadNamespace("SPOTlight") 
# library(SPOTlight, lib.loc="/data/home/bioinfo/R/R4.1.0/library_B3.13/old_versions/")
# for (selected_type in c( "B_GC_LZ", "B_GC_prePB","T_CD4+_TfH_GC",
#                  "B_Cycling",  "FDC") )
# {
#   # selected_type = "B_GC_DZ"
# 
#   spotlight_result_file = paste(dataset_name,"spotlight_deleted", selected_type, ".rds",sep ="_")
#   sc_reference <- subset(sc_seu, subset = Subset != selected_type)
#   
#   #### Extract the top marker genes from each cluster ####
#   Seurat::Idents(object = sc_reference) <- sc_reference@meta.data$Subset
# 
#   set.seed(123)
#   setwd("/data/home/lyang/Visium_spotlight")
# 
#   spotlight_ls <- spotlight_deconvolution(se_sc = sc_reference,
#                                         counts_spatial = sp_seu@assays$Spatial@counts,
#                                         clust_vr = "Subset",
#                                         cluster_markers = cluster_markers_all,
#                                         cl_n = 100, # 100 by default
#                                         hvg = 5000,
#                                         ntop = NULL,
#                                         transf = "uv",
#                                         method = "nsNMF",
#                                         min_cont = 0.09)
# 
#   saveRDS(spotlight_ls,spotlight_result_file)
#   print(spotlight_result_file)
#     
# }
# getwd()
rmsd_summary = rep(0,6)
names(rmsd_summary) =  c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC",
                 "B.Cycling",  "FDC")

manual_GC_annotation = read.csv("/data/home/lyang/scripts_git/manual_GC_annotation.csv")
rds_file = "Visium_spatial.rds"
spatial <- readRDS(rds_file)
spatial@meta.data$Barcode = rownames(spatial@meta.data)
spatial@meta.data = merge(spatial@meta.data, manual_GC_annotation, by = "Barcode", sort = FALSE)

spatial@meta.data[spatial@meta.data$GC == '',"GC"] = "no"

p2 <- SpatialDimPlot(spatial, group.by = "GC") 
# p1 + p2
print(p2)

for (selected_type in  c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC",
                 "B.Cycling",  "FDC") )
{
  # selected_type = "B.GC.DZ"

  spotlight_result_file = paste(dataset_name,"spotlight_deleted", selected_type, ".rds",sep ="_")
  setwd("/data/home/lyang/Visium_spotlight")
  rds_file= spotlight_result_file
  spotlight_ls <- readRDS(rds_file)
  nmf_mod <- spotlight_ls[[1]]
  decon_mtrx <- spotlight_ls[[2]]
  
  
  
  # change the cell type names in sp_seu, replace _ with . to keep names coincide between sp_seu and decon_mtrx
  decon_mtrx_sub <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
  decon_mtrx_sub[decon_mtrx_sub < 0.08] <- 0
  decon_mtrx <- cbind(decon_mtrx_sub, "res_ss" = decon_mtrx[, "res_ss"])
  rownames(decon_mtrx) <- colnames(sp_seu)
  
  decon_df <- decon_mtrx %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")
  
  sp_seu@meta.data <- sp_seu@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")  
  
  matrix_result_complete = norm_weights_matrix[,-ncol(norm_weights_matrix)]

  matrix_result_deleted = as.data.frame(decon_mtrx[,-ncol(decon_mtrx)])
  matrix_result_deleted$selected_type = rep(0,nrow(matrix_result_deleted))
  selected_column = matrix_result_complete[,selected_type]
  matrix_result_complete = as.data.frame(matrix_result_complete)
  df = matrix_result_complete[,!(names(matrix_result_complete) == selected_type)]
  df$selected_type = selected_column
  matrix_result_complete = as.matrix(df)
  matrix_result_deleted = as.matrix(matrix_result_deleted)
  
  
  # for each spatial spot, Calculate the root-mean-square error (RMSE) distance between the predicted proportion under different reference dataset situations
  # load("/data/home/lyang/Visium_RCTD/.RData")
  
  
  matrix_substrated = matrix_result_complete - matrix_result_deleted
  rmsd_per_spot = sqrt(rowSums(matrix_substrated^2)/ncol(matrix_substrated))
  rmsd_per_situation = sum(rmsd_per_spot)
  rmsd_summary[selected_type] = rmsd_per_situation
  
  
  
  
  
  
  
  
  
  unloadNamespace("SPOTlight")
  library(SPOTlight)
  
  ct <- colnames(decon_mtrx)
  decon_mtrx[decon_mtrx < 0.1] <- 0
  
  
  # # show T cell and 
  # # B cell zones and GCs with follicular dendritic cells (FDCs)
  # # FDC, T_CD4+_naive, B_naive
  # # Transcriptionally fine subtypes (B_GC_DZ, B_GC_LZ, B_GC_prePB and 
  # # T_CD4+_TfH_GC); transcriptionally distinct subtypes (B_Cycling and FDC)

  library(ggplot2)
  
  plot_n_cell_type <- function(pal,col_vec,n, cell_type_vector)
  {
    for (char in names(pal)) {
    # print(char)
    if(char %in%  cell_type_vector){
      next
    }
    pal[char] = "#00008b"
  }
  
    return(pal)
  }
  
  cell_type_vector = c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC",
                   "B.Cycling",  "FDC")
  
  pal = plot_n_cell_type(pal_back,col_vec,6,cell_type_vector)
  p1 = plotSpatialScatterpie(
    x = sp_seu,
    y = decon_mtrx[,-ncol(decon_mtrx)],
    cell_types = colnames(y),
    img = FALSE,
    scatterpie_alpha = 1,
    pie_scale = 0.4) +
    scale_fill_manual(
      values = pal,
      breaks = names(pal)) + scale_y_reverse()
   print(p1)
}

df <- data.frame(deleted_cell_type=names(rmsd_summary),
                RMSD=rmsd_summary  )
p<-ggplot(data=df, aes(x=deleted_cell_type, y=RMSD)) +
  geom_bar(stat="identity")
p

# Evaluate the area under Precision Recall curve separately for each cell type, then treat all classes equally and take the average across remaining 5 GC cell types
library(ROCR)
average_PR_curve.area = rep(0,7)
names(average_PR_curve.area) = c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC", "B.Cycling",  "FDC","complete")
manual_GC_annotation = read.csv("/data/home/lyang/scripts_git/manual_GC_annotation.csv")

getwd()
## [1] "/data/home/lyang/Visium_spotlight"
for (deleted_type in c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC", "B.Cycling",  "FDC","complete") )
{

  if(deleted_type == "complete"){
    rds_file= "old_version_ls.rds"
  }else{
  spotlight_result_file = paste(dataset_name,"spotlight_deleted", deleted_type, ".rds",sep ="_")
  rds_file= spotlight_result_file
  }
    spotlight_ls <- readRDS(rds_file)
    nmf_mod <- spotlight_ls[[1]]
    decon_mtrx <- spotlight_ls[[2]]  
    
    
  # change the cell type names in sp_seu, replace _ with . to keep names coincide between sp_seu and decon_mtrx
  decon_mtrx_sub <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
  # decon_mtrx_sub[decon_mtrx_sub < 0.08] <- 0
  decon_mtrx <- cbind(decon_mtrx_sub, "res_ss" = decon_mtrx[, "res_ss"])
  rownames(decon_mtrx) <- colnames(sp_seu)
  
  decon_df <- decon_mtrx %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")
  
  sp_seu@meta.data <- sp_seu@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")  

  matrix_result_deleted = decon_mtrx[,-ncol(decon_mtrx)]
  sum_PR_curve.area = 0
  # Evaluate the area under Precision Recall curve separately for each cell type
  i = 1
  for (remain_type in c("B.GC.DZ", "B.GC.LZ", "B.GC.prePB","T.CD4..TfH.GC", "B.Cycling",  "FDC") )
{
    if(remain_type == deleted_type){
      next
    }
      print(c("deleted_type",deleted_type,"remain_type",remain_type))
    proportions = as.data.frame(matrix_result_deleted[,remain_type]) 
    colnames(proportions) = remain_type
    manual_GC_annotation[manual_GC_annotation$GC == '',"GC"] = 0
    manual_GC_annotation[manual_GC_annotation$GC == 'GC',"GC"] = 1
    proportions$Barcode = rownames(proportions)
    proportions_label = merge(proportions,manual_GC_annotation,by = "Barcode")
    
    
    pred <- prediction(proportions_label[,2],proportions_label[,3])
    
    PR_curve.area = performance(pred, measure = "aucpr")
    PR_curve.area@y.values[[1]]
    sum_PR_curve.area = PR_curve.area@y.values[[1]] + sum_PR_curve.area
    
    PR_curve = performance(pred, measure = "prec", x.measure = "rec")
    if(i ==1)
  {
  df_all <- data.frame(cell_type=rep(remain_type,each=length(PR_curve@x.values[[1]])), 
  recall=c(PR_curve@x.values[[1]]),
  precision=c(PR_curve@y.values[[1]])) 
  i = i + 1 
    }else{

  df <- data.frame(cell_type=rep(remain_type,each=length(PR_curve@x.values[[1]])), 
  recall=c(PR_curve@x.values[[1]]),
  precision=c(PR_curve@y.values[[1]]))
  df_all =  rbind(df_all,df)    
  i = i + 1 
  }
  }
  library(ggplot2)
  df_all$cell_type = as.factor(df_all$cell_type)
  plt <- ggplot(df_all, aes(x=recall, y=precision, color=cell_type)) + geom_line()+
    ggtitle(deleted_type)
  print(plt)
  
  average_PR_curve.area[deleted_type] = sum_PR_curve.area/5

  
}
## [1] "deleted_type" "B.GC.DZ"      "remain_type"  "B.GC.LZ"     
## [1] "deleted_type" "B.GC.DZ"      "remain_type"  "B.GC.prePB"  
## [1] "deleted_type"  "B.GC.DZ"       "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "B.GC.DZ"      "remain_type"  "B.Cycling"   
## [1] "deleted_type" "B.GC.DZ"      "remain_type"  "FDC"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type" "B.GC.LZ"      "remain_type"  "B.GC.DZ"     
## [1] "deleted_type" "B.GC.LZ"      "remain_type"  "B.GC.prePB"  
## [1] "deleted_type"  "B.GC.LZ"       "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "B.GC.LZ"      "remain_type"  "B.Cycling"   
## [1] "deleted_type" "B.GC.LZ"      "remain_type"  "FDC"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type" "B.GC.prePB"   "remain_type"  "B.GC.DZ"     
## [1] "deleted_type" "B.GC.prePB"   "remain_type"  "B.GC.LZ"     
## [1] "deleted_type"  "B.GC.prePB"    "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "B.GC.prePB"   "remain_type"  "B.Cycling"   
## [1] "deleted_type" "B.GC.prePB"   "remain_type"  "FDC"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type"  "T.CD4..TfH.GC" "remain_type"   "B.GC.DZ"      
## [1] "deleted_type"  "T.CD4..TfH.GC" "remain_type"   "B.GC.LZ"      
## [1] "deleted_type"  "T.CD4..TfH.GC" "remain_type"   "B.GC.prePB"   
## [1] "deleted_type"  "T.CD4..TfH.GC" "remain_type"   "B.Cycling"    
## [1] "deleted_type"  "T.CD4..TfH.GC" "remain_type"   "FDC"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type" "B.Cycling"    "remain_type"  "B.GC.DZ"     
## [1] "deleted_type" "B.Cycling"    "remain_type"  "B.GC.LZ"     
## [1] "deleted_type" "B.Cycling"    "remain_type"  "B.GC.prePB"  
## [1] "deleted_type"  "B.Cycling"     "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "B.Cycling"    "remain_type"  "FDC"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type" "FDC"          "remain_type"  "B.GC.DZ"     
## [1] "deleted_type" "FDC"          "remain_type"  "B.GC.LZ"     
## [1] "deleted_type" "FDC"          "remain_type"  "B.GC.prePB"  
## [1] "deleted_type"  "FDC"           "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "FDC"          "remain_type"  "B.Cycling"
## Warning: Removed 5 row(s) containing missing values (geom_path).

## [1] "deleted_type" "complete"     "remain_type"  "B.GC.DZ"     
## [1] "deleted_type" "complete"     "remain_type"  "B.GC.LZ"     
## [1] "deleted_type" "complete"     "remain_type"  "B.GC.prePB"  
## [1] "deleted_type"  "complete"      "remain_type"   "T.CD4..TfH.GC"
## [1] "deleted_type" "complete"     "remain_type"  "B.Cycling"   
## [1] "deleted_type" "complete"     "remain_type"  "FDC"
## Warning: Removed 6 row(s) containing missing values (geom_path).

df <- data.frame(deleted_cell_type=names(average_PR_curve.area),
                average_area_PR_curve=average_PR_curve.area, similarity = "similar"  )
df[df$deleted_cell_type %in% c("B.Cycling" , "FDC"),"similarity"] = "distinct"

ref_line = df["complete",2]
df = df[df$deleted_cell_type!="complete",]

p <-ggplot(data=df, aes(x=deleted_cell_type,y=average_area_PR_curve,fill=similarity)) +  geom_bar(stat="identity") + geom_hline(yintercept=ref_line, linetype="dashed", color = "red")
p

Session information

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ROCR_1.0-11           SPOTlight_0.99.4      RColorBrewer_1.1-2   
##  [4] tidyr_1.2.0           tibble_3.1.6          stringr_1.4.0        
##  [7] bigmemory_4.5.36      Biobase_2.52.0        BiocGenerics_0.38.0  
## [10] ggplot2_3.3.5         dplyr_1.0.8           SeuratDisk_0.0.0.9019
## [13] SeuratObject_4.0.4    Seurat_4.1.0          data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##   [1] NMF_0.23.0                  plyr_1.8.6                 
##   [3] igraph_1.2.11               lazyeval_0.2.2             
##   [5] splines_4.1.0               listenv_0.8.0              
##   [7] scattermore_0.7             GenomeInfoDb_1.28.4        
##   [9] gridBase_0.4-7              digest_0.6.29              
##  [11] foreach_1.5.2               htmltools_0.5.2            
##  [13] fansi_1.0.2                 magrittr_2.0.2             
##  [15] tensor_1.5                  cluster_2.1.2              
##  [17] doParallel_1.0.17           globals_0.14.0             
##  [19] matrixStats_0.61.0          spatstat.sparse_2.1-0      
##  [21] colorspace_2.0-2            ggrepel_0.9.1              
##  [23] xfun_0.29                   RCurl_1.98-1.6             
##  [25] crayon_1.4.2                jsonlite_1.7.3             
##  [27] bigmemory.sri_0.1.3         scatterpie_0.1.7           
##  [29] spatstat.data_2.1-2         survival_3.2-13            
##  [31] zoo_1.8-9                   iterators_1.0.14           
##  [33] glue_1.6.1                  polyclip_1.10-0            
##  [35] registry_0.5-1              gtable_0.3.0               
##  [37] nnls_1.4                    zlibbioc_1.38.0            
##  [39] XVector_0.32.0              leiden_0.3.9               
##  [41] DelayedArray_0.18.0         SingleCellExperiment_1.14.1
##  [43] future.apply_1.8.1          abind_1.4-5                
##  [45] scales_1.1.1                DBI_1.1.2                  
##  [47] rngtools_1.5.2              miniUI_0.1.1.1             
##  [49] Rcpp_1.0.8                  viridisLite_0.4.0          
##  [51] xtable_1.8-4                reticulate_1.24            
##  [53] spatstat.core_2.3-2         bit_4.0.4                  
##  [55] stats4_4.1.0                htmlwidgets_1.5.4          
##  [57] httr_1.4.2                  ellipsis_0.3.2             
##  [59] ica_1.0-2                   pkgconfig_2.0.3            
##  [61] farver_2.1.0                sass_0.4.0                 
##  [63] uwot_0.1.11                 deldir_1.0-6               
##  [65] utf8_1.2.2                  tidyselect_1.1.1           
##  [67] labeling_0.4.2              rlang_1.0.1                
##  [69] reshape2_1.4.4              later_1.3.0                
##  [71] munsell_0.5.0               tools_4.1.0                
##  [73] cli_3.1.1                   generics_0.1.2             
##  [75] ggridges_0.5.3              evaluate_0.14              
##  [77] fastmap_1.1.0               yaml_2.2.2                 
##  [79] goftest_1.2-3               knitr_1.37                 
##  [81] bit64_4.0.5                 fitdistrplus_1.1-6         
##  [83] purrr_0.3.4                 RANN_2.6.1                 
##  [85] pbapply_1.5-0               future_1.23.0              
##  [87] nlme_3.1-152                mime_0.12                  
##  [89] hdf5r_1.3.5                 compiler_4.1.0             
##  [91] rstudioapi_0.13             plotly_4.10.0              
##  [93] png_0.1-7                   spatstat.utils_2.3-0       
##  [95] tweenr_1.0.2                bslib_0.3.1                
##  [97] stringi_1.7.6               highr_0.9                  
##  [99] lattice_0.20-44             Matrix_1.4-0               
## [101] vctrs_0.3.8                 pillar_1.7.0               
## [103] lifecycle_1.0.1             spatstat.geom_2.3-1        
## [105] lmtest_0.9-39               jquerylib_0.1.4            
## [107] RcppAnnoy_0.0.19            bitops_1.0-7               
## [109] cowplot_1.1.1               irlba_2.3.5                
## [111] GenomicRanges_1.44.0        httpuv_1.6.5               
## [113] patchwork_1.1.1             R6_2.5.1                   
## [115] promises_1.2.0.1            KernSmooth_2.23-20         
## [117] gridExtra_2.3               IRanges_2.26.0             
## [119] parallelly_1.30.0           codetools_0.2-18           
## [121] MASS_7.3-54                 assertthat_0.2.1           
## [123] SummarizedExperiment_1.22.0 pkgmaker_0.32.2            
## [125] withr_2.4.3                 sctransform_0.3.3          
## [127] GenomeInfoDbData_1.2.6      S4Vectors_0.30.2           
## [129] mgcv_1.8-38                 ggfun_0.0.5                
## [131] grid_4.1.0                  rpart_4.1-15               
## [133] rmarkdown_2.11              MatrixGenerics_1.4.3       
## [135] Rtsne_0.15                  ggforce_0.3.3              
## [137] shiny_1.7.1